# Load ------------------------------------------------------
library(tidyverse)
library(here)
library(ggiraph)
library(patchwork)
library(ggtext)
library(janitor)
ggplot2::theme_set(theme_bw() + theme(
legend.text = element_markdown(family = 'IBM Plex Sans', colour = 'gray30'),
axis.text = element_markdown(family = 'IBM Plex Sans', colour = 'gray30'),
axis.title = element_markdown(family = 'IBM Plex Sans',colour = 'gray30'),
))
hrbrthemes::import_plex_sans()
source(here('R/report_plots.R'))
# Functions --------------------------------------------------
# creates yyyy-ww label for grouping data
get_date_week <- function(x){
y <- lubridate::year(x)
w <- lubridate::week(x) |> str_pad(2, 'left', 0)
str_glue("{y}-{w}")
}
# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
day_of_week <- lubridate::wday(x)
case_when(
day_of_week %in% 1:3 ~ x + 3 - day_of_week,
TRUE ~ x + 5 - day_of_week,
)
}
# dates 2021-2022 with biweekly groups
biweekly_date_sequence <- function(){
tibble(
date = seq(
as_date('2021-01-01'),
as_date('2022-12-31'),
by = 1)
) |>
mutate(biweek = get_date_biweekly(date))
}
# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
ends = lubridate::as_date(c(start, end))
tibble(
date = seq(ends[1], ends[2], by = 1),
date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
date_year = lubridate::year(date),
week = str_glue("{date_year}-{date_week}"),
) |>
group_by(week) |>
summarise(date = mean(date))
}
binom_ci <- function(x, n){
Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |>
as_tibble() |>
janitor::clean_names()
}
get_binom_ci <- function(data){
data |>
summarise(
x = sum(pcr_positive == 'Positive'),
n = n(),
binconf = map2(x, n, ~binom_ci(.x, .y))
) |>
unnest(binconf) |>
mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}
# Plot Elements ----------------------------------------------
theme_color <- 'cornflowerblue'
# layout x-axis
scale_x_study_dates <- function(){
scale_x_date(
date_breaks = 'month',
date_labels = month.name[c(9:12, 1:5)] |> strtrim(3) |>
paste0(' ', c(rep(2021, 4), rep(2022, 5))),
limits = c(
min(swabs_tidy$date_swab),
max(swabs_tidy$date_swab)
))
}
# get rid of x-axis for vertically stacked plots
remove_x_labels <- function(){
theme(
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank()
)
}
# no minor grid and adjust spacing for multipanel
remove_grid <- function(){
theme(
axis.title.y = element_blank(),
panel.grid.minor = element_blank(),
plot.title.position = 'panel',
plot.title = element_markdown(
vjust = -1,
size = 8,
face = 'bold',
family = 'IBM Plex Sans',
colour = 'gray50',
margin = margin(0, 0, 0, 0)),
plot.margin = unit(c(0, 0, 0, 0), 'mm'))
}
# Data -------------------------------------------------------
# swab data
swabs <-
read_rds(here('data/cube.rds')) |>
filter(str_detect(site, '^UO_'))
# filtered swabs without controls or sponge samples
swabs_tidy <- swabs |>
filter(!negative_control,
swab_type != 'sponge',
date_swab < '2022-04-27') |>
select(date_swab, site, floor, location, pcr_positive, pcr_ct, co2) |>
mutate(biweek = get_date_biweekly(date_swab))
# lookup table for waste water site names
lookup_ww <- tribble(
~site_abbr, ~site,
'UO_AA', 'Annex Residence',
'UO_FA', 'Faculty of Social Sciences',
'UO_FT', 'Friel Residence',
'UO_NA', 'Northern sampling site',
'UO_RGN', 'Roger Guindon Hall',
'UO_ST', 'Southern sampling site',
'UO_TBT', 'Tabaret Hall'
)
# UOttawa waste water data from R. Delatolla
uottawa_ww <-
readxl::read_excel(here::here('data/ww_university.xlsx')) |>
janitor::clean_names() |>
mutate(sample_date = as_date(sample_date)) |>
filter(
sample_date > '2021-09-15',
sample_date < '2022-05-05',
!is.na(viral_copies_per_copies_pep_avg),
site != 'UO_RGN'
) |>
left_join(lookup_ww, by = c('site' = 'site_abbr')) |>
mutate(
source =
source_name |>
str_remove('^Data - uOttawa - ') |>
str_remove('.xlsx$')
) |>
transmute(site,
sample_date,
signal = viral_copies_per_copies_pep_avg)
# ottawa wastewater data: daily means
regional_ww <-
read_rds(here('data/ww_ottawa.rds')) |>
select(sample_date, starts_with('cov_')) |>
pivot_longer(contains('cov_')) |>
mutate(target = str_extract(name, 'cov_n.'),
stat = str_extract(name, 'mean|sd'),
) |>
select(-name) |>
pivot_wider(names_from = stat, values_from = value) |>
mutate(.lower = mean - sd, .upper = mean + sd)
# data from university of ottawa
wifi <-
read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab),
date <= max(swabs$date_swab))
# important dates for context
event_dates <-
tribble(
~event, ~start, ~end,
'Reading Week', '2021-10-24', '2021-10-30',
'Holiday Break', '2021-12-22', '2022-01-04',
'Closure', '2022-01-04', '2022-01-31',
'Closure', '2022-02-16', '2022-02-21',
'Reading Week', '2022-02-20', '2022-02-26',
) |>
mutate(across(start:end, as_date))
# UOttawa cases data
cases <-
read_rds(here('data/cases_rule_imputed.rds')) |>
select(case, role, case_date, UC:TBT) |>
mutate(biweek = get_date_biweekly(case_date)) |>
nest(associated_sites = UC:TBT)
# Summaries --------------------------------------------------
positivity_summary <- function(swabs){
swabs |>
summarise(
n = n(),
x = sum(pcr_positive == 'Positive', na.rm = F),
positivity = round(100 * x / n, digits = 1),
.groups = 'drop')
}
co2_summary <- function(swabs){
swabs |>
summarise(
n = n(),
co2_vals = list(co2),
co2_mean = mean(co2, na.rm = T),
.groups = 'drop')
}
# overall summary
swab_summary <-
swabs_tidy |> positivity_summary()
# site summary
swab_summary_sites <- swabs_tidy |>
group_by(site) |>
positivity_summary()
# high-low traffic summary
swab_summary_location_traffic <- swabs_tidy |>
mutate(traffic = if_else(
str_detect(location, 'Site 1'), 'high traffic', 'low traffic'
)) |>
group_by(traffic) |>
positivity_summary()
## Figure 1: data aggregation ----
# uottawa cases - biweekly counts
cases_biweekly <-
cases |>
select(case, biweek, role) |>
group_by(biweek) |>
summarise(cases = n(),
cases_student = sum(role == 'Student'),
cases_staff = sum(role == 'Employee'),
) |>
right_join(
biweekly_date_sequence() |>
filter(biweek < '2022-02-03') |>
distinct(biweek),
by = 'biweek'
) |>
mutate(across(where(is.integer), ~if_else(is.na(.), 0L, .)))
# swabs biweekly summary
swabs_biweekly <-
swabs_tidy |>
group_by(biweek) |>
positivity_summary() |>
left_join(swabs_tidy |> group_by(biweek) |> co2_summary())
# swabs biweekly summary by site
swabs_biweekly_by_site <-
swabs_tidy |>
group_by(site, biweek) |>
positivity_summary() |>
left_join(swabs_tidy |> group_by(site, biweek) |> co2_summary())
# UOttawa ww biweekly means
uottawa_ww_biweekly <-
uottawa_ww |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(signal = mean(signal),
n = n())
# daily ww data for study period
regional_ww_daily <-
regional_ww |>
filter(
sample_date >= min(swabs_tidy$date_swab) - 4,
sample_date <= max(swabs_tidy$date_swab) + 4,
) |>
group_by(sample_date) |>
summarise(mean = mean(mean))
# regional ww biweekly means
regional_ww_biweekly <-
regional_ww_daily |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(mean = mean(mean))
# wifi site agg
wifi_extended <- read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab) - 2,
date <= max(swabs$date_swab) + 2) |>
mutate(biweek = get_date_biweekly(date))
# overall wifi biweekly timeseries
wifi_biweekly <-
wifi_extended |>
group_by(biweek) |>
summarise(
n = n(),
min = min(clients, na.rm = T),
mean = mean(clients, na.rm = T),
max = max(clients, na.rm = T)
)
# per building wifi biweekly timeseries
wifi_biweek_sites <-
wifi_extended |>
group_by(biweek, site) |>
summarise(
n = n(),
min = min(clients),
mean = mean(clients),
max = max(clients)
)
# Map --------------------------------------------------------
swab_coords <- tribble(
~site, ~name, ~lat, ~lng,
'MRT', 'Morrisette Hall (MRT)', 45.423239101483546, -75.68428970961207,
'MNT', 'Montpetit', 45.42254171025894, -75.68265871464943,
'90U', '90 University', 45.42242555707402, -75.6850144991752,
'DMS', 'Desmarais Bldg.', 45.42387679340988, -75.68727075448753,
'TBT', 'Tabaret Hall', 45.42450940162542, -75.68631900184072,
'LPR', 'Louis Pasteur Bldg.', 45.421375668614466, -75.68026389993058,
)
ww_coords <- tribble(
~site, ~name, ~lat, ~lng,
'aa', 'Annex Residence', 45.42678129026445, -75.68034405960745,
'fa', 'Faculty of Soc. Sci.', 45.421624722628515, -75.68390386012699,
'ft', 'Friel Residence', 45.43043440080566, -75.68290766533741,
'tbt', 'Tabaret Hall', 45.42450940162542, -75.68631900184072
# 'na', 'North sampling Site', NA, NA,
# 'st', 'South Sampling Site', NA, NA, # Nobody knows where this is
)
figure_sites_map <-
leaflet::leaflet(swab_coords, width = 600, height = 330) |>
leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) |>
leaflet::setView(lng = -75.6843, lat = 45.4235, zoom = 14) |>
leaflet::addCircleMarkers(~lng, ~lat, label = ~name, popup = ~name,
radius = 2, color = '#440099') |>
leaflet::addLabelOnlyMarkers(
~lng, ~lat, label = ~site,
labelOptions = leaflet::labelOptions(
noHide = T, direction = 'top', textOnly = T,
)) |>
leaflet::addCircleMarkers(~lng, ~lat, label = ~site,
radius = 2, color = '#440099') |>
leaflet::addCircleMarkers(
data = ww_coords, ~lng, ~lat, label = ~name, popup = ~name,
radius = 4, color = '#f96999')
# Results ----------------------------------------------------
rs_data <- lst(
swabs_collected = nrow(swabs_tidy),
positivity_ovr = swab_summary$positivity,
positivity_site_min = min(swab_summary_sites$positivity),
positivity_site_max = max(swab_summary_sites$positivity),
)
rs_abstract <- rs_data |>
glue::glue_data(
"Over the 32-week study period, we collected {swabs_collected} swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing.
Overall, {positivity_ovr}% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between {positivity_site_min}% and {positivity_site_max}%.
*Comment on when spikes in cases, swabs, and waste-water occurred….* *Comment on prediction of cases using swabs, ww, or combined approach…*
"
)
rs_results <- rs_data |>
glue::glue_data()